Load required libraries, installing them beforehand if necessary, for use in the subsequent steps:
if (!'tidyr' %in% installed.packages()) {
install.packages('tidyr')
}
library(tidyr)
if (!'dplyr' %in% installed.packages()) {
install.packages('dplyr')
}
library(dplyr)
if (!'ggplot2' %in% installed.packages()) {
install.packages('ggplot2')
}
library(ggplot2)
if (!'stringr' %in% installed.packages()) {
install.packages('stringr')
}
library(stringr)
if (!'lubridate' %in% installed.packages()) {
install.packages('lubridate')
}
library(lubridate)
if (!'ggdark' %in% installed.packages()) {
install.packages('ggdark')
}
library(ggdark)
if (!'ggExtra' %in% installed.packages()) {
install.packages('ggExtra')
}
library(ggExtra)
if (!'schoolmath' %in% installed.packages()) {
install.packages('schoolmath')
}
library(schoolmath)
if (!'plotly' %in% installed.packages()) {
install.packages('plotly')
}
library(plotly)
if (!"tufte" %in% installed.packages()) {
install.packages("tufte")
}
library(tufte)
if (!'ggthemes' %in% installed.packages()){
install.packages('ggthemes')
}
library(ggthemes)
if(!'data.table' %in% installed.packages()){
install.packages('data.table')
}
library(data.table)
if(!'gapminder' %in% installed.packages()){
install.packages('gapminder')
}
library(gapminder)
if(!'ggalt' %in% installed.packages()){
install.packages('ggalt')
}
library(ggalt)
Colour definition [ABB: why not include here all the colours used in the ggplots (e.g.“#f03b20”,“#cccccc”…)?]
fill_color <- '#ffffff' # white
decoration_color <- '#b6b5b5' # medium gray
text_color <- '#000000' # black
main1_color <- '#FF0000' # primary red
main2_color <- '#ff8d00' # orange yellow
brewercolors <- 'YlOrRd' # ColorBrew color scheme
Theme definition
bigmac_theme_light <- theme_tufte() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(size = 10, hjust = 0.2, color = text_color),
axis.title.x = element_text(size = 8, hjust = 0.5, color = text_color),
axis.title.y = element_text(angle = 0, size = 8, hjust = 0.5, color = text_color), #[ABB: Check with team]
axis.text = element_text(colour = text_color, size = 6),
axis.ticks = element_blank(),
axis.line = element_line(colour = decoration_color, size=0.3),
panel.border = element_blank(),
panel.grid = element_blank(),
strip.text = element_text(size = 10, color = decoration_color),
panel.background = element_blank(),
strip.background =element_blank(),
plot.background = element_blank(),
legend.text = element_text(size = 6, hjust = 0.5, color = text_color),
legend.position = c(0.8, 0.9),
legend.key = element_blank(),
legend.title = element_blank()
)
theme_set(bigmac_theme_light)
Prepare the libraries needed for appropriate table rendering:
if (!'knitr' %in% installed.packages()) {
install.packages('knitr')
}
library(knitr)
if (!'kableExtra' %in% installed.packages()) {
install.packages('kableExtra')
}
library(kableExtra)
Read the .csv file into a data.frame, select columns of interest and inspect the initial data structure:
bigmac <- read.csv("Big Mac Index.csv")
bigmac <- bigmac[, c('date','name','dollar_price','USD_adjusted','iso_a3','adj_price')]
kable_styling(kable(head(bigmac)), full_width = F)
| date | name | dollar_price | USD_adjusted | iso_a3 | adj_price |
|---|---|---|---|---|---|
| 2000-04-01 | Argentina | 2.500000 | NA | ARG | NA |
| 2000-04-01 | Australia | 1.541667 | NA | AUS | NA |
| 2000-04-01 | Brazil | 1.648045 | NA | BRA | NA |
| 2000-04-01 | Canada | 1.938775 | NA | CAN | NA |
| 2000-04-01 | Switzerland | 3.470588 | NA | CHE | NA |
| 2000-04-01 | Chile | 2.451362 | NA | CHL | NA |
dim(bigmac)
## [1] 1162 6
str(bigmac)
## 'data.frame': 1162 obs. of 6 variables:
## $ date : Factor w/ 29 levels "2000-04-01","2001-04-01",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 57 levels "Argentina","Australia",..: 1 2 5 7 48 8 9 13 14 16 ...
## $ dollar_price: num 2.5 1.54 1.65 1.94 3.47 ...
## $ USD_adjusted: num NA NA NA NA NA NA NA NA NA NA ...
## $ iso_a3 : Factor w/ 56 levels "ARE","ARG","AUS",..: 2 3 6 7 8 9 10 13 14 16 ...
## $ adj_price : num NA NA NA NA NA NA NA NA NA NA ...
Coerce column date from character to Date:
bigmac$date <- as.Date(bigmac$date)
Add column continent via the library gapminder:
bigmac <- data.table(bigmac)
continents <- gapminder[ ,c(1,2)]
continents <- continents[!duplicated(continents),]
bigmac <- merge(bigmac, continents, by.x='name', by.y='country')
Remove NAs and check:
bigmac <- bigmac[complete.cases(bigmac), ]
sum(is.na(bigmac))
## [1] 0
Inspect processed dataset:
dim(bigmac)
## [1] 479 7
str(bigmac)
## Classes 'data.table' and 'data.frame': 479 obs. of 7 variables:
## $ name : Factor w/ 57 levels "Argentina","Australia",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Date, format: "2011-07-01" "2012-01-01" ...
## $ dollar_price: num 4.84 4.64 4.16 3.82 3.88 ...
## $ USD_adjusted: num 1.011 0.756 0.489 0.316 0.286 ...
## $ iso_a3 : Factor w/ 56 levels "ARE","ARG","AUS",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ adj_price : num 3.15 2.99 3.01 3.26 3.12 ...
## $ continent : Factor w/ 5 levels "Africa","Americas",..: 2 2 2 2 2 2 2 2 2 2 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "sorted")= chr "name"
Summarise bigmac dataset by:
and filter by:
storing for every case into a separate data.frame:
bigmac_year <- bigmac %>% group_by(year(date), name) %>% summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_year)), full_width = F)
| year(date) | name | AvgDollarPrice |
|---|---|---|
| 2011 | Argentina | 4.839685 |
| 2011 | Australia | 4.943724 |
| 2011 | Brazil | 6.162429 |
| 2011 | Canada | 5.000793 |
| 2011 | Chile | 3.997839 |
| 2011 | China | 2.273080 |
bigmac_continent <- bigmac %>% group_by(continent, date) %>% summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_continent)), full_width = F)
| continent | date | AvgDollarPrice |
|---|---|---|
| Africa | 2011-07-01 | 2.618379 |
| Africa | 2012-01-01 | 2.510627 |
| Africa | 2012-07-01 | 2.496640 |
| Africa | 2013-01-01 | 2.351654 |
| Africa | 2013-07-01 | 2.314576 |
| Africa | 2014-01-01 | 2.296257 |
bigmac_usa <- bigmac %>% filter(name == "United States") %>% group_by(date) %>%
summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_usa)), full_width = F)
| date | AvgDollarPrice |
|---|---|
| 2011-07-01 | 4.065000 |
| 2012-01-01 | 4.197220 |
| 2012-07-01 | 4.327500 |
| 2013-01-01 | 4.367396 |
| 2013-07-01 | 4.556667 |
| 2014-01-01 | 4.624167 |
bigmac_europe <- bigmac %>% filter(continent == "Europe")
kable_styling(kable(head(bigmac_europe)), full_width = F)
| name | date | dollar_price | USD_adjusted | iso_a3 | adj_price | continent |
|---|---|---|---|---|---|---|
| Czech Republic | 2011-07-01 | 4.072401 | 0.452 | CZE | 3.670336 | Europe |
| Czech Republic | 2012-01-01 | 3.448013 | 0.144 | CZE | 3.417339 | Europe |
| Czech Republic | 2012-07-01 | 3.341410 | 0.057 | CZE | 3.400976 | Europe |
| Czech Republic | 2013-01-01 | 3.722159 | 0.137 | CZE | 3.673327 | Europe |
| Czech Republic | 2013-07-01 | 3.490596 | 0.024 | CZE | 3.526358 | Europe |
| Czech Republic | 2014-01-01 | 3.473215 | 0.027 | CZE | 3.584248 | Europe |
bigmac_2014 <- bigmac %>% filter(year(date) == "2012") %>% group_by(name) %>%
summarize(AvgDollarPrice = mean(dollar_price), AvgDollarAdjusted = mean(USD_adjusted)) %>%
mutate(Up = as.factor(is.positive(AvgDollarAdjusted)))
kable_styling(kable(head(bigmac_2014)), full_width = F)
| name | AvgDollarPrice | AvgDollarAdjusted | Up |
|---|---|---|---|
| Argentina | 4.398785 | 0.6225 | TRUE |
| Australia | 4.808358 | 0.0460 | TRUE |
| Brazil | 5.307322 | 0.9105 | TRUE |
| Canada | 4.827628 | 0.1435 | TRUE |
| Chile | 4.104388 | 0.4525 | TRUE |
| China | 2.444080 | -0.0315 | FALSE |
Additional preprocessing [ABB: Check where this fits into the overall workflow] [ABB: Check compatibility code snippet below vs rest of code (adj_price is not used anywhere else? NAs?)]
bigmac4 <- read.csv("Big Mac Index.csv")
bigmac5 <- bigmac4[,c('name','date','dollar_price')]
bigmac6 <- bigmac4[,c('name','date','adj_price')]
bigmac7 <- dcast(bigmac5, name~date)
## Warning in dcast(bigmac5, name ~ date): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the reshape2::dcast; please
## note that reshape2 is deprecated, and this redirection is now deprecated as
## well. Please do this redirection yourself like reshape2::dcast(bigmac5). In the
## next version, this warning will become an error.
## Using 'dollar_price' as value column. Use 'value.var' to override
bigmac8 <- dcast(bigmac6, name~date)
## Warning in dcast(bigmac6, name ~ date): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the reshape2::dcast; please
## note that reshape2 is deprecated, and this redirection is now deprecated as
## well. Please do this redirection yourself like reshape2::dcast(bigmac6). In the
## next version, this warning will become an error.
## Using 'adj_price' as value column. Use 'value.var' to override
bigmac_USprice <- bigmac7[,c(1,30)]
bigmac_adjusted <- bigmac8[,c(1,30)]
names(bigmac_USprice)[2] <- "USPrice_2018"
names(bigmac_adjusted)[2] <- "adjusted_2018"
Merg_data <- merge(bigmac_USprice, bigmac_adjusted)
Merg_data <- Merg_data[complete.cases(Merg_data), ] # ABB: [Check if this line is ok]
Merg_data <- arrange(Merg_data, desc(USPrice_2018))
levels <- Merg_data$name
Merg_data$name <- factor(Merg_data$name, levels = levels)
ggplot() +
geom_line(data = bigmac, aes(date, dollar_price, group = name), color = "#cccccc", alpha = 0.7, lwd = 0.25) +
geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.35, show.legend = FALSE, color = "#f03b20") +
labs(title = "The BigMac Index", x = '', y = '$')
ggplot() +
geom_line(data = bigmac, aes(date, dollar_price, group = name), color = "#cccccc", alpha = 0.7, lwd = 0.25) +
geom_line(data = bigmac_continent, aes(date, AvgDollarPrice, color = continent), lwd = 0.5, show.legend = TRUE) +
geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.9, show.legend = FALSE,
color = "#f03b20", linetype = "dashed") +
scale_color_manual(values = c("#FDD835", "#F39C12", "#E67E22", "#F5B041", "#E67E22")) +
labs(title = "The BigMac Index", x = '', y = '$')
ggplot() +
geom_line(data = transform(bigmac, name = NULL), aes (date, dollar_price, group = iso_a3),
alpha = 0.7, lwd = 0.2, colour = decoration_color) +
geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color = "#f03b20") +
theme(strip.background = element_blank(), strip.placement = "outside") +
facet_wrap(~ continent) +
xlab('') +
ylab('$')
ggplot() +
geom_tufteboxplot(data = bigmac_continent, aes(continent, AvgDollarPrice), color = '#ff8d00') +
xlab('') +
ylab('$')
ggplot() +
geom_line(data = bigmac_europe, aes(date, dollar_price, group = name),
alpha = 0.7, lwd = 0.2, color = decoration_color) +
geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color= "#f03b20") +
theme(strip.background = element_blank(), strip.placement = "outside") +
labs(title = "Europe", x = '', y = '$')
ggplot() +
geom_line(data = bigmac_europe, aes(date, dollar_price, group = name),
alpha = 0.8, lwd = 0.2, color = decoration_color) +
geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color= "#f03b20") +
theme(strip.background = element_blank(), strip.placement = "outside") +
labs(title = "Price of a BigMac", x = '', y = '$') +
facet_wrap(~ name)
p <- ggplot(bigmac_2014, aes(name, AvgDollarAdjusted, color = Up, text = paste(name,'\n', AvgDollarAdjusted))) +
geom_point(show.legend = FALSE, size = 2, alpha = 0.75) +
scale_color_manual(values = c('#ff8d00', '#FF0000')) +
theme(axis.title.x = element_blank(), axis.text.x = element_blank()) +
labs(title = "BigMac Price 2014", x = '', y = '$') +
geom_hline(yintercept = 0, alpha=.5, lwd = 0.3, color = decoration_color)
ggplotly(p, tooltip = c("text")) %>% layout(showlegend = FALSE)
Additionally: dumbbell plot
ggplot(Merg_data, aes(name, x = USPrice_2018, xend = adjusted_2018)) + labs(x = "Price", y = "") +
geom_vline(xintercept = mean(Merg_data$USPrice_2018, na.rm = T), color = decoration_color, linetype = "dashed") +
geom_dumbbell(aes(y = name), color = main2_color, colour_xend = "red", size_xend = 2) +
geom_text(data = Merg_data, aes(x = 5.5, y = "Egypt", label = "GDP Adjusted Price ($)"),
color = "red", hjust = 0, size = 3) +
geom_point(data = Merg_data, aes(x = 5.4, y = "Egypt"), color = "red", size = 2)